

*********************************
* This do-file constructs the data
* On 1/25/2019
*********************************

clear all

*************************
******** Paths **********
*************************

global  dir "..." // main path


**************************************
************ 1) Preliminaries ********
**************************************

* Original file not in path:
use "...\full_data_infilled.dta", replace  // 223,000 individuals

* Delete if record not found
drop if record_found=="N" // 96,015 observations deleted
cap drop search_date

* Keep 4-year college students first. What is "L"???
destring year_2_4, replace
sort cid enrollment_begin
cap drop first_4year
bysort cid: g first_4year = 1 if year_2_4=="4" & _n==1

// IDEA FOR LATER: Test with 2-year students getting into 4-year college (!)

cap drop aux
bysort cid: egen aux = max(first_4year)
replace first_4year=aux
cap drop aux

keep if first_4year==1 // 496,661 observations deleted
drop first_4year

/*
* Total number of individuals:
egen tag_ind=tag( cid)
ta tag // 65,288 individuals
drop tag
*/

* dates:
format enrollment_begin %td
format enrollment_end %td
sort cid enrollment_begin

* if entry enrollment_begin is missing, use next observation
bys cid: g entry_empty=enrollment_begin==. if _n==1
bys cid: replace enrollment_begin = enrollment_begin[_n+1] if entry_empty==1 & _n==1 // 0 changes, probably because only observation

sort cid entry_empty
bysort cid: replace entry_empty=entry_empty[1]
drop if entry_empty==1 // 510 observations
drop entry_empty

*generate entry year
gen entry_year_temp=year(enrollment_begin) // this variable is key
by cid: egen entry_year=min(entry_year_temp)
cap drop entry_year_temp

* define first school
sort cid enrollment_begin
by cid: g first_school = college_code_branch if _n==1
by cid: replace first_school=first_school[1]

**************************************
********* 2)  Outcomes ***************
**************************************

*define graduated from any school
cap drop graduated_any
gen graduated_any=1 if grad_date!=""

* define graduated from first school
cap drop graduated_first
gen graduated_first=1 if graduated_any==1 & first_school==college_code_branch

sort cid graduated_any
by cid: replace graduated_any=graduated_any[1]
replace graduated_any=0 if graduated_any==.

sort cid graduated_first
by cid: replace graduated_first=graduated_first[1]
replace graduated_first=0 if graduated_first==.

* define transfer outcomes:
cap drop transfer
gen transfer=1 if graduated_first==0 & first_school!=college_code_branch // attends a different school

cap drop transfer_2yr
g transfer_2yr =1 if transfer==1 & year_2_4!="4"
cap drop transfer_4yr
g transfer_4yr =1 if transfer==1 & year_2_4=="4"

foreach var in transfer transfer_2yr transfer_4yr {
	sort cid `var'
	by cid: replace `var'=`var'[1]
	replace `var'=0 if `var'==.
	}

* define graduate school outcome var
cap drop grad
gen grad=1 if (class_level=="D"|class_level=="G"|class_level=="L"|class_level=="M"|class_level=="P")
g master = 1 if class_level=="M"

* problem: initial observation is grad school:
sort cid enrollment_begin
cap drop dummy
by cid: g dummy = grad if _n==1
cap drop dummy_aux
bysort cid: egen dummy_aux=max(dummy)
drop if dummy_aux==1 // 4,173 observations dropped
drop dummy_aux dummy
 
foreach var in master grad {
	sort cid `var'
	by cid: replace `var'=`var'[1]
	replace `var'=0 if `var'==.
	}
  
* Time spent studying:
g enrollment_time = enrollment_end- enrollment_begin
su enrollment_time, d // winsorize at p99 and p1, weird numbers
replace enrollment_time=. if enrollment_time>r(p99) // 6675 obs
replace enrollment_time=. if enrollment_time<r(p1) // 4533 obs

by cid: egen total_enrollment_time = total(enrollment_time) // in days

* obtain STEM and other major outcomes:
merge n:1 cid using "$dir\data\Person_level_NSC_CCP_withSelectivity_test4.dta", keep(1 3) ///
	keepusing(classification) nogen

g major_data = 1 if	classification!=""
g stem = classification=="stem" if major_data==1
g business = classification=="business" if major_data==1
g vocation = classification=="vocation" if major_data==1
g art = classification=="art" if major_data==1

* relevant outcomes from this section are total_enrollment_time graduated_first graduated_any grad master
 * stem business vocation art transfer transfer_2yr transfer_4yr
 
* Save outcomes:
preserve
	rename master masters_ever
	rename grad grad_ever
	keep cid entry_year total_enrollment_time graduated_first graduated_any grad_ever masters_ever ///
	stem business vocation art transfer transfer_2yr transfer_4yr
	cap drop tag
	egen tag=tag(cid)
	keep if tag==1
	save "$dir\data\aux_outcomes.dta", replace
restore 


g private_school = pub_priv=="Private"
g public_school = nonprof_forprof == "Public (P)"
g private_non_profit = nonprof == "Private Non-Profit (N)"

* Save initial school-level variables:
preserve
	keep if first_school==college_code_branch
	bys cid: keep if _n==1
	keep cid college_code_branch college_name college_state private_school public_school private_non_profit
	save "$dir\data\aux_initial_college_data.dta", replace
restore 

* Demographics:
preserve
	keep cid birth_year entry_year 
	cap drop tag
	egen tag=tag(cid)
	keep if tag==1
	save "$dir\data\aux_entry_year.dta", replace
restore 

*AL: keep one obs per individual
keep cid
egen tag=tag(cid)
keep if tag==1 // Sample has 64,144 individuals

* Census vars:
preserve
	merge 1:1 cid using "$dir\data\census_vars.dta", keep(1 3)
	save "$dir\data\aux_census_vars.dta", replace
restore 

// AL: up to here we have a cross section, one obs per individual of individuals who first attended a 4-year college


*********************************
*** 3) Merge to ccp *************
*********************************

cap drop _merge
merge 1:m cid using "$dir\data\loanlevel94p_panel.dta", keep(1 3)

* Non-merged will have zero balances

* AL: save this stage allows me to go back after loooooong merge
save "$dir\data\aux_after_merge.dta", replace




*********************************
*** 4) debt outcomes ************
*********************************

use "$dir\data\aux_after_merge.dta", replace

// individuals in CCP, rest 0 debt
replace SL_loanlevel3=0 if SL_loanlevel3==. // 18,314 observations are .


* merge to entry year: 
cap drop _merge
merge n:1 cid using "$dir\data\aux_entry_year.dta", keep(3) nogen
cap drop tag

* Compute originations
sort cid t
bys cid t: egen totbal=sum(SL_loanlevel3)
replace totbal=0 if totbal==.
 
cap drop SL_origination
gen SL_origination=0
bys cid (t): replace SL_origination=1 if _n==1 & totbal !=0 & t!=200303 // AL: origination initial quarter
bys cid (t): replace SL_origination=1 if totbal[_n]>=(totbal[_n-1]+500) & _n>1  

replace SL_origination=0 if SL_origination==.

by cid (t), sort: gen byte SL_firstorig= sum(SL_origination)==1 & sum(SL_origination[_n-1])==0
rename totbal SL_totbal

cap drop SL_orig_bal
gen SL_orig_bal=0 
bys cid (t): replace SL_orig_bal = SL_totbal-SL_totbal[_n-1] if SL_origination==1
replace SL_orig_bal = SL_totbal if SL_firstorig==1 // initial balance if first origination             


replace t=t/100
gen year=round(t)

sort cid year

* AL: for each individual, compute total new debt each quarter and year
cap drop quarter
g quarter = round(t*100-year*100)

bysort cid t: egen aux_originations = max(SL_orig_bal)
replace SL_orig_bal=aux_originations
drop aux_originations

* keep one observation per quarter
sort cid year quarter
bysort cid year quarter: keep if _n==1 // still 64144 individuals

* keep student loan balance for the last quarter at the end of each year
forvalues x=1/6 {
	local y=`x'-1
	g balance_year`x' = SL_totbal if year==entry_year+`y' & quarter==12
	sort cid balance_year`x'
	bysort cid: replace balance_year`x'=balance_year`x'[1]
	
	replace balance_year`x' = SL_totbal if year==entry_year+`y' & quarter==9 & balance_year`x'==. // if last quarter missing for some reason
	sort cid balance_year`x'
	bysort cid: replace balance_year`x'=balance_year`x'[1] if balance_year`x'==.

	replace balance_year`x' = SL_totbal if year==entry_year+`y' & quarter==6 & balance_year`x'==. // if last quarter missing for some reason
	sort cid balance_year`x'
	bysort cid: replace balance_year`x'=balance_year`x'[1] if balance_year`x'==.

	replace balance_year`x' = SL_totbal if year==entry_year+`y' & quarter==3 & balance_year`x'==. // if last quarter missing for some reason
	sort cid balance_year`x'
	bysort cid: replace balance_year`x'=balance_year`x'[1] if balance_year`x'==.
	}

* Compute increases in student debt each year
cap drop aux_increase
bysort cid year: egen aux_increase = total(SL_orig_bal)
cap drop SL_orig_bal
rename aux_increase SL_orig_bal


* keep one observation per  year
sort cid year
bysort cid year: keep if _n==1

* compute variables for originations in college years t=1, t=2, t=3, etc
forvalues x=1/6 {
	local y =`x'-1
	cap drop originations_year`x'
	g originations_year`x' = SL_orig_bal if year==entry_year+`y'
	sort cid originations_year`x'
	bysort cid: replace originations_year`x'=originations_year`x'[1]
	}

sort cid year
bysort cid: keep if _n==1

keep cid year originations_year* balance_year* entry_year

* Compute total student debt after 4 years by adding originations as an alternative measure
egen total_student_debt_4 = rowtotal(originations_year1 originations_year2 originations_year3 originations_year4)
forvalues x=1/6 {
	replace originations_year`x' = 0 if originations_year`x'==.
	}

egen total_student_debt_6 = rowtotal(originations_year1 originations_year2 originations_year3 originations_year4 originations_year5 originations_year6)
forvalues x=1/6 {
	replace originations_year`x' = 0 if originations_year`x'==.
	}	
	
keep cid balance_year4 total_student_debt_4

/*
* get school level vars
merge n:1 cid using "$dir\data\aux_initial_college_data.dta", keep(1 3) nogen

* get outcomes
merge n:1 cid using "$dir\data\aux_outcomes.dta", keep(1 3) nogen


* merge to census
merge n:1 cid using "$dir\data\aux_census_vars.dta", keep(1 3) nogen
// Data has 61,819 individuals
*/

replace balance_year4=0 if balance_year4==.
replace total_student_debt_4=0 if total_student_debt_4==.

save "$dir\data\origdata.dta", replace


***********************************
**** AUX: prepare tuition data: ***
***********************************

* use "$dir\data\tuitiondata.dta", replace
use "$dir\data\tuition_updated.dta", replace

rename year academicyear
* 
* CPI: (source https://www.bls.gov/cpi/tables/supplemental-files/historical-cpi-u-201811.pdf)
gen cpi=.
replace cpi=138.1 if academicyear==1992
replace cpi=142.6 if academicyear==1993
replace cpi=146.2 if academicyear==1994
replace cpi=150.3 if academicyear==1995
replace cpi=154.4 if academicyear==1996
replace cpi=159.1 if academicyear==1997
replace cpi=161.6 if academicyear==1998
replace cpi=164.3 if academicyear==1999
replace cpi=168.8 if academicyear==2000
replace cpi=175.1 if academicyear==2001
replace cpi=177.1 if academicyear==2002
replace cpi=181.7 if academicyear==2003
replace cpi=185.2 if academicyear==2004
replace cpi=190.7 if academicyear==2005
replace cpi=198.3 if academicyear==2006
replace cpi=202.4 if academicyear==2007
replace cpi=211.1 if academicyear==2008
replace cpi=211.143 if academicyear==2009
replace cpi=216.687 if academicyear==2010
replace cpi=220.223 if academicyear==2011
replace cpi=226.655 if academicyear==2012
replace cpi=230.280 if academicyear==2013
replace cpi=233.916 if academicyear==2014 // everything as of 2014 dollars
replace cpi=233.707 if academicyear==2015
replace cpi=233.916 if academicyear==2016
replace cpi=242.839 if academicyear==2017


g cpi_index = cpi/233.916

cap drop tuition
g tuition = indist
replace tuition = tuition/cpi_index

xtset unitid academicyear

*tuition difference
cap drop deltatuition
gen deltatuition=tuition-L.tuition  
cap drop perc_tuit 
gen perc_tuit=deltatuition/L.tuition  
drop if perc_tuit==.

keep academicyear unitid tuition deltatuition perc_tuit cpi cpi_index

* Sample selection criteria:
* 1) keep students first enrolled between 1998 and 2012
* 2) keep tuition shocks between 2001 and 2016

* mark tuition shocks from 2005 onwards only
* 1) mark tuition changes
foreach x in 0 5 10 15 20 25 30 {
	cap drop dummy_change_`x'
	g dummy_change_`x' = perc_tuit>=`x'/100 if perc_tuit<. & academicyear>=2005 & academicyear<=2014
}

* 2) count number of shocks by school
foreach x in 0 5 10 15 20 25 30 {
	cap drop number_`x'
	bysort unitid: egen number_`x' = total(dummy_change_`x')

}

* 3) for schools with more than one shock, choose largest
foreach x in 0 5 10 15 20 25 30 {
	bysort unitid: egen max_shock_`x' = max(perc_tuit) if dummy_change_`x'==1 & number_`x'>1
	replace dummy_change_`x'=0 if max_shock_`x'>perc_tuit & dummy_change_`x'==1 & number_`x'>1
}

* Construct dollar amount shocks too:
* mark tuition shocks from 2001 onwards only
* 1) mark tuition changes
foreach x in 100 500 800 900 1000 1100 1200 1500 2000 2500 {
	cap drop dummy_change_`x''
	g dummy_change_`x' = deltatuition>=`x' if deltatuition<. & academicyear>=2005 & academicyear<=2014
}

* 2) count number of shocks by school
foreach x in 100 500 800 900 1000 1100 1200 1500 2000 2500 {
	cap drop number_`x'
	bysort unitid: egen number_`x' = total(dummy_change_`x')

}

* 3) for schools with more than one shock, choose largest
foreach x in 100 500 800 900 1000 1100 1200 1500 2000 2500  {
	bysort unitid: egen max_shock_`x' = max(deltatuition) if dummy_change_`x'==1 & number_`x'>1
	replace dummy_change_`x'=0 if max_shock_`x'>deltatuition & dummy_change_`x'==1 & number_`x'>1
}


save "$dir\data\tuitiondata_aux.dta", replace


*********************************
*** 6) tuition data *************
*********************************
use "$dir\data\origdata.dta", replace

* For each individual, obtain initial college and form panel with years around entry year
merge 1:1 cid using "$dir\data\aux_initial_college_data.dta", nogen
merge 1:1 cid using "$dir\data\aux_entry_year.dta", nogen

keep cid entry_year college_code_branch college_name

g year=entry_year
egen gcid=group(cid)
tsset gcid year

keep if entry_year>=1998 & entry_year<=2016 // 2016 allows us to calculate 4 year tuition

tsfill, full

sort gcid cid
by gcid: replace cid=cid[_N] if cid==""
sort gcid college_code_branch
by gcid: replace college_code_branch=college_code_branch[_N] if college_code_branch==""
sort gcid college_name
by gcid: replace college_name=college_name[_N] if college_name==""
sort gcid entry_year
by gcid: replace entry_year=entry_year[1] if entry_year==.

destring college_code_branch, replace ignore("-")
rename college_code_branch opeid
rename year academicyear


* Manually examine 303 non-merged institutions
replace opeid = 104700 if opeid>=104703 & opeid<=104710 // TROY UNIVERSITY
replace opeid = 104800 if opeid==104754 // TROY DOTHAN
replace opeid = 104900 if opeid==104755 // TROY MONTGOMERY
replace opeid = 130500 if opeid>=130501 & opeid<=130503 // STANFORD
replace opeid = 131700 if opeid==131703 // UNIVERSITY OF CALIFORNIA - SAN DIEGO
replace opeid = 132900 if opeid==132901 // UNIVERSITY OF THE PACIFIC MCGEORGE SCHOOL OF LAW
replace opeid = 137400 if opeid==137413 // ALBERTUS MAGNUS - NEW DIMENSIONS
replace opeid = 140200 if opeid==140201 // QUINNIPIAC UNIVERSITY  SCHOOL OF LAW
replace opeid = 144400 if opeid==144401 | opeid==144402 // GEORGE WASHINGTON
replace opeid = 144500 if opeid==144501 | opeid==144502 | opeid==144503 // GEORGETOWN UNIVERSITY - LAW SCHOOL
replace opeid = 147900 if opeid==147901 | opeid==147902 // EMBRY
replace opeid = 149900 if opeid==149901 // EVEREST UNIVERSITY - SOUTH ORLANDO
replace opeid = 153400 if opeid==149904 // EVEREST UNIVERSITY - TAMPA
replace opeid = 153400 if opeid==149905 // EVEREST UNIVERSITY - BRANDON
replace opeid = 2599800 if opeid==149907 // EVEREST UNIVERSITY - LARGO
replace opeid = 2599800 if opeid==149909 // EVEREST UNIVERSITY - JACKSONVILLE
replace opeid = 814600 if opeid==149910 // EVEREST UNIVERSITY - POMPANO
replace opeid = 153400 if opeid==153408 | opeid==153409 | opeid==153410  // EVEREST UNIV - TAMPA BRANDON CAMPUS
replace opeid = 153600 if opeid==153601 // UNIVERSITY OF MIAMI - LAW
replace opeid = 153700 if opeid==153704 // NEW COLLEGE OF FLORIDA
replace opeid = 156400 if opeid==156401 // EMORY
replace opeid = 158000 if opeid==158001 // MERCER UNIVERSITY SOUTHERN SCHOOL OF PHARMACY
replace opeid = 181300 if opeid==181301 // INDIANA UNIVERSITY- PURDUE UNIVERSITY COLUMBUS
replace opeid = 190000 if opeid==190001 // WILLIAM PENN WST DES MOINE ACCELERATED
replace opeid = 193700 if opeid==193702 // OTTAWA UNIVERSITY-KANSAS CITY
replace opeid = 202900 if opeid==202901 // TULANE
replace opeid = 207700 if opeid==207702 | opeid==207705 // JOHNS HOPKINS UNIVERSITY ARTS,SCIENCES ENGINEERING
replace opeid = 215500 if opeid>=215502 & opeid<=215511 // HARVARD
replace opeid = 244100 if opeid==244101 // UNIVERSITY OF SOUTHERN MISSISSIPPI-GULF PARK
replace opeid = 256800 if opeid==256801 // NEVADA STATE COLLEGE
replace opeid = 260700 if opeid==260701 | opeid==260702 // FAIRLEIGH DICKINSON UNIVERSITY - MADISON
replace opeid = 262900 if opeid==262923 // RUTGERS - THE STATE UNIVERSITY OF NJ - BIOMEDICAL
replace opeid = 263200 if opeid==263201 // SETON HALL UNIVERSITY - LAW STUDENTS
replace opeid = 267800 if opeid>=267801 & opeid<=267817 // BRYANT & STRATTON COLLEGE
replace opeid = 279100 if opeid>=279101 & opeid<=279102 // PACE UNIVERSITY - PLEASANTVILLE
replace opeid = 289400 if opeid==289401 // UNIVERSITY OF ROCHESTER- EASTMAN SCHOOL OF MUSIC
replace opeid = 290300 if opeid==290301 // YESHIVA UNIVERSITY
replace opeid = 309901 if opeid==1072700 // DEVRY
replace opeid = 318400 if opeid==318401 // UNIVERSITY OF OKLAHOMA
replace opeid = 322500 if opeid==322501 // WARNER PACIFIC COLLEGE
replace opeid = 325600 if opeid==325601 // DREXEL UNIVERSITY - HEALTH SCIENCES
replace opeid = 332900 if opeid==332901 // PENN STATE-HERSHEY PARK MED CTR
replace opeid = 338800 if opeid==338801 // VILLANOVA UNIVERSITY - LAW
replace opeid = 340400 if opeid==340403 // JOHNSON & WALES- NORTH MIAMI
replace opeid = 341000 if opeid==341001 // ROGER WILLIAMS UNIVERSITY
replace opeid = 349500 if opeid==349503 // JOHNSON UNIVERSITY - FLORIDA
replace opeid = 353000 if opeid==353006 // UNIVERSITY OF TENNESSEE HEALTH SCIENCE CENTER
replace opeid = 353500 if opeid==353502 // VANDERBILT UNIVERSITY -MED SCHOOL
replace opeid = 363200 if opeid==363204 // TEXAS A&M UNIVERSITY - GALVESTON
replace opeid = 367400 if opeid==367401 // STEVENS-HENAGER OREM
replace opeid = 367400 if opeid>=367401 & opeid<=367411  // STEVENS-HENAGER OREM
replace opeid = 369100 if opeid==369110  // MONTEREY INSTITUTE OF INTERNATIONAL STUDY
replace opeid = 372600 if opeid==372613  // AMERICAN NATIONAL UNIVERSITY
replace opeid = 224700 if opeid==384256  // CONCORDIA UNIVERSITY - ANN ARBOR
replace opeid = 393800 if opeid>=393801 & opeid<=393811  // INTER AMERICAN UNIV OF PUERTO RICO- LAW
replace opeid = 407200 if opeid==407215 | opeid==407221 // NORTHWOOD UNIVERSITY - WEST PALM BEACH
replace opeid = 2120900 if opeid==455304  | opeid==832902 | opeid==844302 | opeid==732708 | (opeid>=3070402 & opeid<=3087604) // ITT TECHNICAL INSTITUTE
replace opeid = 458600 if opeid==458616  // KAPLAN UNIVERSITY - LEWISTON
replace opeid = 461800 if opeid==461802   // SPENCERIAN COLLEGE
replace opeid = 461900 if opeid==461901   // SULLIVAN UNIVERSITY
replace opeid = 464200 if opeid>=464201 & opeid<=464208  // GLOBE UNIVERSITY - EAU CLAIRE
replace opeid = 464600 if opeid>=464602 & opeid<=464609  // MINNESOTA SCHOOL OF BUSINESS
replace opeid = 472900 if opeid>=472901 & opeid<=472906  // MOUNT WASHINGTON COLLEGE - CONCORD
replace opeid = 479900 if opeid==479901 // MONROE COLLEGE - NEW ROCHELLE
replace opeid = 708500 if opeid==708501 // MOUNT VERNON NAZARENE UNIVERSITY
replace opeid = 814600 if opeid==814604 // EVEREST COLLEGE - MERRIONETE PARK
replace opeid = 832200 if opeid==832201 // DEVRY UNIVERSITY - FREMONT
replace opeid = 869400 if opeid>=869401 & opeid<=869422 // RASMUSSEN COLLEGE - BROOKLYN PARK
replace opeid = 1085100 if opeid==1085102 // ANTHEM INSTITUTE -  NORTH BRUNSWICK
replace opeid = 2179900 if opeid==2179902 & opeid<=2179947 // ARGOSY UNIVERSITY
replace opeid = 4051300 if opeid==4051301 | opeid==4051306 // ART INSTITUTE OF PHOENIX-AI OF TUCSON
replace opeid = 2491100 if opeid==2491102 //BECKFIELD COLLEGE - TRI-COUNTY
replace opeid = 1116600 if opeid==1116604 //BROADVIEW UNIVERSITY
replace opeid = 2110800 if opeid==2110801 //CALIFORNIA COLLEGE
replace opeid = 3098600 if opeid==3098601 //CAREER TECHNOLOGY CENTER OF LACKAWANNA COUNTY
replace opeid = 3426400 if opeid==3426406 //CHUBB INSTITUTE - CHICAGO - IL
replace opeid = 2594300 if opeid>=2594302 & opeid<=2594304 //COLLEGE AMERICA-DENVER
replace opeid = 3120300 if opeid==3120301 //COLLEGE AMERICA-FLAGSTAFF
replace opeid = 1014800 if opeid>=1014802 & opeid<=1014805 //COLORADO TECHNICAL UNIVERSITY
replace opeid = 1072700 if opeid==1072722 //DEVRY  UNIVERSITY - FT. WASHINGTON
replace opeid = 309900 if opeid==309901 //DEVRY UNIVERSITY
replace opeid = 1072700 if opeid>=1072701 & opeid<=1072754 //DEVRY UNIVERSITY; LOL THERE ARE SOOOOO MANY!!!!
replace opeid = 3010600 if opeid==3010622 //ECOTECH INSTITUTE - AURORA
replace opeid = 1019800 if opeid>=1019815 & opeid<=1019816 //ECPI UNIVERSITY - INNSBROOK
replace opeid = 1185800 if opeid==1185802 //EVEREST COLLEGE - BURR RIDGE
replace opeid = 2250600 if opeid==2250602 //EVEREST COLLEGE- CUCAMONGA
replace opeid = 2298500 if opeid==2298501 //EVEREST COLLEGE-FORT WORTH
replace opeid = 2295000 if opeid==2295002 //EVEREST COLLEGE-MESA
replace opeid = 1112300 if opeid==1112301 //EVEREST INSTITUTE - NORCROSS
replace opeid = 149900 if opeid==149912 //EVEREST UNIVERSITY - EVEREST COLLEGE - KANSAS CITY
replace opeid = 2599800 if opeid==2599801 //EVEREST UNIVERSITY - LAKELAND
replace opeid = 2599800 if opeid>=2599805 & opeid<=2599810 //EVEREST UNIVERSITY-JACKSONVILLE
replace opeid = 2208700 if opeid==3010613 //GOLF ACADEMY OF AMERICA - PHOENIX
replace opeid = 2158400 if opeid>=2158401 & opeid<=2158417 //HARRISON COLLEGE - ANDERSON
replace opeid = 962100 if opeid>=962103 & opeid<=962110 //HERZING UNIVERSITY
replace opeid = 2160300 if opeid>=2160303 & opeid<=2160307 //INTERNATIONAL ACADEMY OF DESIGN - COLLINS COLLEGE
replace opeid = 3031400 if opeid==3031403 //INTERNATIONAL ACADEMY OF DESIGN - LE CORDON BLEU
replace opeid = 2321900 if opeid==2286502 | (opeid>=2291601 & opeid<=2291604) | opeid==2361004 //ITT TECHNICAL INSTITUTE
replace opeid = 207700 if opeid==207703 //JOHNS HOPKINS UNIVERSITY SCHOOL OF EDUCATION
replace opeid = 458600 if opeid==458612 //KAPLAN UNIVERSITY - HAGERSTOWN
replace opeid = 986300 if opeid==986301 //LANCASTER INSTITUTE FOR HEALTH EDUCATION NURSING
replace opeid = 2616700 if opeid>=2616703 & opeid<=2616705 //LE CORDON BLEU COLLEGE OF CULINARY ARTS - BOSTON
replace opeid = 275100 if opeid==275101 //LONG ISLAND UNIV - BRENTWOOD
replace opeid = 1173200 if opeid==1173201 //MAYO CLINIC COLL OF MEDICINE - NURSING/ANESTHESIA
replace opeid = 278200 if opeid==278202 //NYIT COLLEGE OF OSTEOPATHIC MEDICINE
replace opeid = 2205200 if opeid==2205203 | opeid==2205205 //SANFORD BROWN COLLEGE -ST PETERS
replace opeid = 282500 if opeid==282501 //ST JOSEPH'S COLLEGE - SUFFOLK
replace opeid = 927000 if opeid==927004 | opeid==927005 | opeid==927003 //THE ART INSTITUTE OF CHARLESTON
replace opeid = 723600 if opeid==2179932 | opeid==2179944 | opeid== 2179945 | opeid==2179947 | opeid==2179936 | opeid==2179919 | opeid==2179919 | opeid==2179903 | opeid==2179905 | opeid==2179929 | opeid==2179928 | opeid==2179913 | opeid==2179909 | opeid==2179901 | opeid==2179935  | opeid==2179941 //ARGOSY
replace opeid = 887800 if opeid==887805 | opeid==887806 // THE ART INSTITUTE OF TAMPA
replace opeid = 1239300 if opeid==1239302 | opeid==1239301 // THOMAS JEFFERSON UNIVERSITY  ALLIED HEALTH SCIENCE
replace opeid = 1014200 if opeid==1014205 // TOURO COLLEGE - LAW
replace opeid = 1039400 if opeid>=1039401 & opeid<=1039409 // UMDNJ
replace opeid = 2559300 if opeid==2559309 // UNITED EDUCATION INSTITUTE-CHULA VISTA
replace opeid = 131500 if opeid==131501 // UNIVERSITY OF CALIFORNIA-EXTENSION
replace opeid = 232500 if opeid==232503 | opeid==232501 // UNIVERSITY OF MICHIGAN-LAW
replace opeid = 1039500 if opeid==1039501 // UNIVERSITY OF SAN DIEGO LAW SCHOOL
replace opeid = 3010600 if opeid<=3010634 & opeid>=3010602 // VIRGINIA COLLEGE - AUGUSTA
replace opeid = 2171500 if opeid==2171545 // WESTERN INTERNATIONAL UNIVERSITY
replace opeid = 290300 if opeid==290303 // YESHIVA UNIVERSITY-MIDTOWN-STERN COLLEGE FOR WOMEN

* Barrons data
preserve
	keep cid opeid
	egen tag=tag(cid)
	keep if tag==1
	merge n:1 opeid using "$dir\data\barrons2001.dta", keep(3) keepusing(competitive)
	keep cid competitive
	* selectivity:
	g first_enrollment_selective_v2 = competitive<=4
	save "$dir\data\aux_barons2001.dta", replace
restore

* Merge opeid identifier:
cap drop _merge
merge m:1 opeid using "$dir\data\ope_unitid_match.dta", keep(1 3) nogen

* MERGE to tuition data
cap drop _merge
merge m:1 unitid academicyear using "$dir\data\tuitiondata_aux.dta", keep(3) nogen

* for every individual, compute 4-year tuition "price"
bysort gcid: egen tuition_bill = total(tuition) if  academicyear>=entry_year & academicyear<=entry_year+3
replace tuition_bill=tuition_bill/10000

* Tutition
preserve
	keep if academicyear==entry_year // 34,353 observations
	keep opeid tuition_bill cid
	/*
	qui su tuition_bill, d
	replace tuition_bill=r(p99) if tuition_bill>r(p99) & tuition_bill<.
	replace tuition_bill=r(p1) if tuition_bill<r(p1)
	*/
	save "$dir\data\tuition_bill_full.dta", replace
restore

xtset gcid academicyear

cap drop max_shock_5-max_shock_30 max_shock_500-max_shock_2500
cap drop number_5-number_30 number_500-number_2500

keep cid academicyear dummy_change_* deltatuition perc_tuit

* final file:

merge n:1 cid using "$dir\data\origdata.dta", nogen
merge n:1 cid using "$dir\data\aux_initial_college_data.dta", nogen
merge n:1 cid using "$dir\data\aux_entry_year.dta", nogen
merge n:1 cid using "$dir\data\aux_census_vars.dta", nogen keepusing(mean_agi2001 median_hhld_income)
merge n:1 cid using "$dir\data\aux_outcomes.dta", nogen
merge n:1 cid using "$dir\data\tuition_bill_full.dta", nogen
merge n:1 cid using "$dir\data\aux_barons2001.dta", nogen
replace first_enrollment_selective_v2=0 if first_enrollment_selective_v2==.

* NEW EDUCATION OUTCOMES DATA (Aug 2019):
merge n:1 cid using "$dir\data\education_outcomes_by_age.dta", nogen keep(1 3)



keep if entry_year>=1998 & entry_year<=2013 // selection criteria 1
* keep if tuition_bill<.

replace total_enrollment_time=total_enrollment_time/30

g age = entry_year - birth_year

foreach var in 	balance_year4 total_student_debt_4   {
	replace `var' = `var'/10000
	}	
* Inflation	

* CPI: (source https://www.bls.gov/cpi/tables/supplemental-files/historical-cpi-u-201811.pdf)
gen cpi=.
replace cpi=138.1 if academicyear==1992
replace cpi=142.6 if academicyear==1993
replace cpi=146.2 if academicyear==1994
replace cpi=150.3 if academicyear==1995
replace cpi=154.4 if academicyear==1996
replace cpi=159.1 if academicyear==1997
replace cpi=161.6 if academicyear==1998
replace cpi=164.3 if academicyear==1999
replace cpi=168.8 if academicyear==2000
replace cpi=175.1 if academicyear==2001
replace cpi=177.1 if academicyear==2002
replace cpi=181.7 if academicyear==2003
replace cpi=185.2 if academicyear==2004
replace cpi=190.7 if academicyear==2005
replace cpi=198.3 if academicyear==2006
replace cpi=202.4 if academicyear==2007
replace cpi=211.1 if academicyear==2008
replace cpi=211.143 if academicyear==2009
replace cpi=216.687 if academicyear==2010
replace cpi=220.223 if academicyear==2011
replace cpi=226.655 if academicyear==2012
replace cpi=230.280 if academicyear==2013
replace cpi=233.916 if academicyear==2014 // everything as of 2014 dollars
replace cpi=233.707 if academicyear==2015
replace cpi=233.916 if academicyear==2016
replace cpi=242.839 if academicyear==2017


g cpi_index = cpi/233.916

foreach var in balance_year4 total_student_debt_4 {
	replace `var' = `var'/cpi_index
	}
* labels:
label var total_enrollment_time "Total months enrolled"	
label var graduated_first "Graduate from 1st school"
label var graduated_any "Graduate from any school"
label var balance_year4 "Balance 4 years after entry (10,000)"
label var total_student_debt_4 "Origination years 1-4 after entry (10,000)"
label var grad_ever "Enrolls in graduate school"
label var masters_ever "Enrolls in Masters"
label var transfer "Transfers from first school"
label var transfer_2yr "Transfers from first school to 2 year school"
label var transfer_4yr "Transfers from first school to 4 year school"
label var age "Age at entry"
label var public_school "First school is public"
label var private_non_profit "First school is private non-profit" 
label var stem "STEM"
label var business "Business"
label var art "Art"
label var vocation "Vocations"
label var first_enrollment_selective_v2 "First school is selective"
label var mean_agi2001 "Mean agi income (10,000)"
label var median_hhld_income "Median hhld income (10,000)"

replace mean_agi2001=mean_agi2001/10000
replace median_hhld_income=median_hhld_income/10000

* NEW VARS:
foreach age in 22 25 30 35 {
	label var stem_g_by`age' "STEM-g by age `age'"
	label var business_g_by`age' "Bus-g by age `age'"
	label var art_g_by`age' "Art-g by age `age'"
	label var vocation_g_by`age' "Voc-g by age `age'"
	label var gradschool_g_by`age' "Grad-g by age `age'"
	
	label var stem_a_by`age' "STEM-a by age `age'"
	label var business_a_by`age' "Bus-a by age `age'"
	label var art_a_by`age' "Art-a by age `age'"
	label var vocation_a_by`age' "Voc-a by age `age'"
	label var gradschool_a_by`age' "Grad-a by age `age'"
	
	label var bach_g_by`age' "Graduated by age `age'"
	
	label var transfer_up_by`age' "Transf up by age `age'"
	label var transfer_down_by`age' "Transf down by age `age'"
	label var transfer_even_by`age' "Transf down by age `age'"
	label var transfer_down_level_by`age' "Transf to 2yr by age `age'"


}

* Main outcomes
label var gradschool_g_by35 "Graduate school"
label var balance_year4 "Debt (10,000)"
label var bach_g_by35 "Bachelors"
label var transfer "Bachelors"

* for later:
egen tag_ind=tag(cid)

* come back to this:
replace stem=0 if stem==.
replace business=0 if business==.
replace art=0 if art==.
replace vocation=0 if vocation==.

* fin education data:
merge n:1 college_state using "$dir\data\fin_education.dta", keep(1 3) nogen

/*
preserve
	* Credit variables:
	use "$dir\data\working_data.dta", replace
	keep if tag_ind==1
	keep cid entry_year
	merge 1:n cid using "$dir\data\SL_delinquency_vars.dta", keep(1 3) nogen
	sort cid year

	g baseline_score = riskscore if entry_year==year
	g has_score = baseline_score<. if entry_year==year

	g four_yr_score = riskscore if year==entry_year+4
	g six_yr_score = riskscore if year==entry_year+6
	g eight_yr_score = riskscore if year==entry_year+8

	g score_2018 = riskscore if year==2018	
	
	g four_yr_delinquency = sl_dq_by_age if year==entry_year+4
	g six_yr_delinquency = sl_dq_by_age if year==entry_year+6
	g eight_yr_delinquency = sl_dq_by_age if year==entry_year+8

	g four_yr_default = SL_default_by_age if year==entry_year+4
	g six_yr_default = SL_default_by_age if year==entry_year+6
	g eight_yr_default = SL_default_by_age if year==entry_year+8
	
	g four_yr_percent = percent_sl_dq if year==entry_year+4
	g four_yr_percent_c = percent_sl_dq_c if year==entry_year+4
	g six_yr_percent = percent_sl_dq if year==entry_year+6
	g six_yr_percent_c = percent_sl_dq_c if year==entry_year+6
	g eight_yr_percent = percent_sl_dq if year==entry_year+8
	g eight_yr_percent_c = percent_sl_dq_c if year==entry_year+8

	g percent_2017 = percent_sl_dq if year==2017
	g percent_2017_c = percent_sl_dq_c if year==2017

	g delinquency_2018 = sl_dq_by_age if year==2018
	g default_2018 = SL_default_by_age if year==2018

	* volatility of score
	by cid: egen score_volatility = sd(riskscore) if year>=entry_year+4
	by cid: egen score_mean = mean(riskscore) if year>=entry_year+4
	by cid: egen max_pct_default = max(percent_sl_dq) if year>=entry_year+4
	
	collapse baseline_score has_score four_yr_score six_yr_score eight_yr_score score_2018 ///
		four_yr_delinquency six_yr_delinquency eight_yr_delinquency four_yr_default six_yr_default eight_yr_default ///
		four_yr_percent four_yr_percent_c six_yr_percent six_yr_percent_c eight_yr_percent eight_yr_percent_c ///
		percent_2017 percent_2017_c delinquency_2018 default_2018 ///
		score_volatility score_mean max_pct_default, by(cid)

	label var baseline_score "Risk score at first enrollment"
	label var has_score "Has a risk score at first enrollment"
	label var four_yr_score "Risk score 4 yrs after first enrollment"
	label var six_yr_score "Risk score 6 yrs after first enrollment"
	label var eight_yr_score "Risk score 8 yrs after first enrollment"

	label var score_2018 "Risk score 2018"
	
	label var four_yr_delinquency "Any student debt delinquency 4 yrs after enrollment"
	label var six_yr_delinquency "Any student debt delinquency 6 yrs after enrollment"
	label var eight_yr_delinquency "Any student debt delinquency 8 yrs after enrollment"

	label var delinquency_2018 "Student debt delinquency 2018"

	label var four_yr_default "Any student debt default 4 yrs after enrollment"
	label var six_yr_default "Any student debt default 6 yrs after enrollment"
	label var eight_yr_default "Any student debt default 8 yrs after enrollment"
	label var default_2018 "Any student debt default 2018"

	label var four_yr_percent "Percent balance delinq. 4 yrs after enrollment"
	label var four_yr_percent_c "Percent balance delinq. 4 yrs after enrollment - conditional"
	label var six_yr_percent "Percent balance delinq. 6 yrs after enrollment"
	label var six_yr_percent_c "Percent balance delinq. 6 yrs after enrollment - conditional"
	label var eight_yr_percent "Percent balance delinq. 8 yrs after enrollment"
	label var eight_yr_percent_c "Percent balance delinq. 8 yrs after enrollment - conditional"

	label var percent_2017 "Percent balance delinq. 2017"
	label var percent_2017_c "Percent balance delinq. 2017 - conditional"
	
	label var max_pct_default "Max percentage delinq."
	label var score_volatility "St dev of risk score"
	label var score_mean "Mean risk score"
	
	save "$dir\data\credit_outcomes.dta", replace
restore	
*/



merge n:1 cid using "$dir\data\credit_outcomes.dta", keep(1 3) nogen


save "$dir\data\working_data.dta", replace


* Brings new outcome variables and restricts to the final sample
use "$dir\data\working_data.dta", clear
merge m: cid using "$dir\data\ccp_outcomes_randomized.dta"
drop if _merge==2
drop _merge
merge m: cid using "$dir\data\ccp_zip_codes_agi_diff_agi_randomized.dta"
drop if _merge==2
drop _merge
save "$dir\data\working_data.dta", replace
